RU23: Data Analysis Assignment¶
# imports
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import cool_maps.plot as cplt
import cartopy.crs as ccrs
import cmocean.cm as cmo
import numpy as np
%matplotlib inline
# Now we add a few animation-specific imports...
from IPython import display
from matplotlib import animation
from mpl_toolkits.axes_grid1 import make_axes_locatable
#plt.rcParams['animation.ffmpeg_path'] = 'Whatever_your_path_is'
First we load in the Fall Deployment Data¶
# Using the delayed Science Profile data
fall_24_url = 'https://slocum-data.marine.rutgers.edu/erddap/tabledap/ru23-20241022T1512-profile-sci-delayed.csv?time%2Clatitude%2Clongitude%2Cdepth%2Cbeta_470nm%2Cbeta_532nm%2Cbeta_660nm%2Ccdom%2Cdensity%2Csalinity%2Ctemperature&time%3E=2024-10-28T00%3A00%3A00Z&time%3C=2024-11-04T14%3A46%3A11Z'
fall_24 = pd.read_csv(fall_24_url, sep=',',index_col=0,parse_dates=['time'],skiprows=[1])
fall_24
#Had to change the url to the actual fall 2024
| latitude | longitude | depth | beta_470nm | beta_532nm | beta_660nm | cdom | density | salinity | temperature | |
|---|---|---|---|---|---|---|---|---|---|---|
| time | ||||||||||
| 2024-10-28 00:00:01+00:00 | 38.749018 | -73.155370 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2024-10-28 00:00:01+00:00 | 38.749017 | -73.155370 | 0.188566 | NaN | NaN | NaN | NaN | 1024.63610 | 34.602318 | 19.3378 |
| 2024-10-28 00:00:02+00:00 | 38.749015 | -73.155371 | 0.168717 | NaN | NaN | NaN | NaN | 1024.63880 | 34.605064 | 19.3353 |
| 2024-10-28 00:00:04+00:00 | 38.749010 | -73.155372 | 0.188566 | NaN | NaN | NaN | NaN | 1024.64280 | 34.608883 | 19.3311 |
| 2024-10-28 00:00:06+00:00 | 38.749005 | -73.155373 | 0.198490 | NaN | NaN | NaN | NaN | 1024.64330 | 34.608963 | 19.3296 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2024-11-04 14:46:04+00:00 | 39.415072 | -74.147837 | 0.238174 | NaN | NaN | NaN | NaN | 1023.22864 | 31.910190 | 16.6631 |
| 2024-11-04 14:46:06+00:00 | 39.415074 | -74.147831 | 0.178631 | NaN | NaN | NaN | NaN | 1023.22850 | 31.910170 | 16.6625 |
| 2024-11-04 14:46:07+00:00 | 39.415075 | -74.147829 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2024-11-04 14:46:08+00:00 | 39.415077 | -74.147825 | 0.248098 | NaN | NaN | NaN | NaN | 1023.23140 | 31.912520 | 16.6593 |
| 2024-11-04 14:46:10+00:00 | 39.415079 | -74.147819 | 0.218326 | NaN | NaN | NaN | NaN | 1023.22906 | 31.909775 | 16.6596 |
538563 rows × 10 columns
Next we load in the ongoing Spring Deployment Data¶
# Using the real-time or delayed Science Profile data
spring_25_url = 'https://slocum-data.marine.rutgers.edu/erddap/tabledap/ru23-20250226T1720-profile-sci-rt.csv?time%2Clatitude%2Clongitude%2Cdepth%2Cbeta_470nm%2Cbeta_532nm%2Cbeta_660nm%2Ccdom%2Cdensity%2Csalinity%2Ctemperature&time%3E=2025-03-04T00%3A00%3A00Z&time%3C=2025-03-11T15%3A07%3A57Z'
spring_25 = pd.read_csv(spring_25_url, sep=',',index_col=0,parse_dates=['time'],skiprows=[1])
spring_25
#alter Url to only contain the main data
| latitude | longitude | depth | beta_470nm | beta_532nm | beta_660nm | cdom | density | salinity | temperature | |
|---|---|---|---|---|---|---|---|---|---|---|
| time | ||||||||||
| 2025-03-04 00:25:31+00:00 | 38.818310 | -73.206875 | 0.148867 | 0.000000 | 0.000000 | 0.000000 | 0.0000 | 1025.8278 | 32.871750 | 6.3670 |
| 2025-03-04 00:25:39+00:00 | 38.818310 | -73.206875 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2025-03-04 00:25:39+00:00 | 38.818310 | -73.206875 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2025-03-04 00:26:51+00:00 | 38.819369 | -73.207754 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2025-03-04 00:27:05+00:00 | 38.819576 | -73.207926 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2025-03-11 15:07:24+00:00 | 39.455647 | -74.210111 | 6.966449 | 0.001981 | 0.002664 | 0.001303 | 0.0937 | 1025.7179 | 32.446970 | 4.7047 |
| 2025-03-11 15:07:32+00:00 | 39.455667 | -74.210106 | 5.587071 | 0.001893 | 0.002110 | 0.001201 | 0.2811 | 1025.7065 | 32.439970 | 4.6985 |
| 2025-03-11 15:07:40+00:00 | 39.455687 | -74.210101 | 4.356539 | 0.001911 | 0.002139 | 0.001334 | -0.7496 | 1025.6951 | 32.432865 | 4.6993 |
| 2025-03-11 15:07:49+00:00 | 39.455710 | -74.210096 | 2.778669 | 0.001893 | 0.002225 | 0.001138 | -0.0937 | 1025.6904 | 32.435940 | 4.6958 |
| 2025-03-11 15:07:54+00:00 | 39.455723 | -74.210093 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
35226 rows × 10 columns
Now to clean up the data in time a little, resample both at 1 minute means and clear out NaNs¶
fall_24 = fall_24.resample('1Min').mean().dropna()
fall_24.index = pd.to_datetime(fall_24.index.values, format='%Y-%m-%d %H:%M:%S')
fall_24
| latitude | longitude | depth | beta_470nm | beta_532nm | beta_660nm | cdom | density | salinity | temperature | |
|---|---|---|---|---|---|---|---|---|---|---|
| 2024-10-28 01:25:00 | 38.750907 | -73.165888 | 4.175808 | 0.000382 | 0.000384 | 0.000129 | -0.222104 | 1024.700945 | 34.739656 | 19.560333 |
| 2024-10-28 01:26:00 | 38.750932 | -73.166012 | 11.024788 | 0.000396 | 0.000388 | 0.000130 | -0.217626 | 1024.731642 | 34.739828 | 19.558821 |
| 2024-10-28 01:27:00 | 38.750955 | -73.166134 | 17.932806 | 0.000393 | 0.000386 | 0.000128 | -0.144809 | 1024.762328 | 34.740405 | 19.559514 |
| 2024-10-28 01:28:00 | 38.750980 | -73.166258 | 24.842421 | 0.000394 | 0.000390 | 0.000132 | -0.124933 | 1024.791900 | 34.742370 | 19.568837 |
| 2024-10-28 01:29:00 | 38.751003 | -73.166379 | 31.428266 | 0.000394 | 0.000386 | 0.000131 | -0.202041 | 1024.822133 | 34.745053 | 19.572300 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2024-11-01 16:51:00 | 39.277980 | -73.900751 | 28.261178 | 0.000972 | 0.001178 | 0.000654 | 0.128837 | 1023.884047 | 32.749395 | 17.148127 |
| 2024-11-01 16:52:00 | 39.277950 | -73.900768 | 27.250349 | 0.000841 | 0.001095 | 0.000565 | 0.187400 | 1023.824427 | 32.683623 | 17.168167 |
| 2024-11-01 16:53:00 | 39.277920 | -73.900786 | 16.469317 | 0.000544 | 0.000647 | 0.000252 | 0.139130 | 1023.542583 | 32.404021 | 17.251411 |
| 2024-11-01 16:54:00 | 39.277891 | -73.900804 | 8.448400 | 0.000524 | 0.000569 | 0.000220 | 0.249867 | 1023.154937 | 31.962309 | 17.309500 |
| 2024-11-01 16:55:00 | 39.277861 | -73.900821 | 5.421219 | 0.000543 | 0.000544 | 0.000229 | 0.140550 | 1023.119861 | 31.936652 | 17.317728 |
1205 rows × 10 columns
spring_25 = spring_25.resample('1Min').mean().dropna()
spring_25.index = pd.to_datetime(spring_25.index.values, format='%Y-%m-%d %H:%M:%S')
spring_25
| latitude | longitude | depth | beta_470nm | beta_532nm | beta_660nm | cdom | density | salinity | temperature | |
|---|---|---|---|---|---|---|---|---|---|---|
| 2025-03-04 00:25:00 | 38.818310 | -73.206875 | 0.148867 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1025.827800 | 32.871750 | 6.367000 |
| 2025-03-04 01:31:00 | 38.819869 | -73.200771 | 3.215498 | 0.000403 | 0.000497 | 0.000182 | -0.374800 | 1025.840100 | 32.871655 | 6.381300 |
| 2025-03-04 01:32:00 | 38.819867 | -73.200667 | 8.479011 | 0.000419 | 0.000503 | 0.000217 | -0.409938 | 1025.862062 | 32.871540 | 6.400412 |
| 2025-03-04 01:33:00 | 38.819865 | -73.200542 | 18.088630 | 0.000436 | 0.001050 | 0.000222 | -0.374800 | 1025.906000 | 32.871678 | 6.407214 |
| 2025-03-04 01:34:00 | 38.819862 | -73.200431 | 26.828282 | 0.000428 | 0.000758 | 0.000208 | -0.481886 | 1025.977700 | 32.923213 | 6.481757 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2025-03-11 14:52:00 | 39.453400 | -74.210613 | 8.520174 | 0.002184 | 0.002690 | 0.001534 | 0.000000 | 1025.747729 | 32.472252 | 4.681143 |
| 2025-03-11 14:53:00 | 39.453535 | -74.210583 | 3.264932 | 0.001940 | 0.002285 | 0.001315 | -0.062467 | 1025.691150 | 32.438724 | 4.731725 |
| 2025-03-11 15:05:00 | 39.455366 | -74.210173 | 7.479621 | 0.002189 | 0.002526 | 0.001469 | 0.174014 | 1025.729500 | 32.455123 | 4.679157 |
| 2025-03-11 15:06:00 | 39.455511 | -74.210141 | 14.001013 | 0.002622 | 0.003006 | 0.001965 | -0.104111 | 1025.785156 | 32.486544 | 4.677367 |
| 2025-03-11 15:07:00 | 39.455656 | -74.210108 | 6.197360 | 0.001993 | 0.002343 | 0.001340 | -0.109317 | 1025.717950 | 32.448923 | 4.684750 |
2112 rows × 10 columns
Data Activity Assignment: RU23 Science Profile Prompts¶
1A. Plot TS diagrams for each deployment. Both plots should be colored by time, have density contours, and have matching axis limits.¶
fall_24['date'] = pd.to_numeric(fall_24.index)
spring_25['date'] = pd.to_numeric(spring_25.index)
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import gsw # Gibbs Seawater Oceanographic Toolbox
def plot_ts_diagram(df, sal_col='salinity', temp_col='temperature', time_col='date', title='TS Diagram'):
# Convert time to datetime and numeric
time = pd.to_datetime(df[time_col])
time_numeric = mdates.date2num(time)
# Extract salinity and temperature
salt = df[sal_col]
temp = df[temp_col]
# Calculate density grid
smin, smax = salt.min() * 0.99, salt.max() * 1.01
tmin, tmax = temp.min() - (0.1 * abs(temp.max())), temp.max() + (0.1 * abs(temp.max()))
xdim = int((smax - smin) / 0.1) + 1
ydim = int(tmax - tmin) + 1
si = np.linspace(smin, smax, xdim)
ti = np.linspace(tmin, tmax, ydim)
dens = np.zeros((ydim, xdim))
for j in range(ydim):
for i in range(xdim):
dens[j, i] = gsw.rho(si[i], ti[j], 0) # 0 dbar pressure
# Plot
fig, ax = plt.subplots(figsize=(10, 6))
# Density contours
density_levels = np.arange(np.floor(dens.min()), np.ceil(dens.max()) + 1, 1)
CS = ax.contour(si, ti, dens, levels=density_levels, colors='gray', linestyles='dashed')
ax.clabel(CS, fontsize=10, inline=1, fmt='%1.0f')
# Scatter plot colored by time
sc = ax.scatter(salt, temp, c=time_numeric, cmap='viridis', s=10, edgecolor='none')
# Colorbar with date formatting
cbar = plt.colorbar(sc)
cbar.set_label('Date')
cbar.ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.setp(cbar.ax.yaxis.get_majorticklabels(), rotation=45)
# Labels, title, limits, grid
ax.set_xlabel('Salinity')
ax.set_ylabel('Temperature (°C)')
ax.set_title(title)
ax.set_xlim(31, 36) # consistent axis limits
ax.set_ylim(tmin, tmax)
ax.grid(True)
plt.tight_layout()
plt.show()
#show the plots
plot_ts_diagram(fall_24, title='TS Plot: Fall 2024')
plot_ts_diagram(spring_25, title='TS Plot: Spring 2025')
# Shared temperature (y-axis) limits
tmin = min(fall_24['temperature'].min(), spring_25['temperature'].min()) - 0.1
tmax = max(fall_24['temperature'].max(), spring_25['temperature'].max()) + 0.1
shared_ylim = (tmin, tmax)
# Shared salinity (x-axis) limits
smin = min(fall_24['salinity'].min(), spring_25['salinity'].min()) * 0.99
smax = max(fall_24['salinity'].max(), spring_25['salinity'].max()) * 1.01
shared_xlim = (smin, smax)
def plot_ts_diagram(df, sal_col='salinity', temp_col='temperature', title='TS Diagram',
xlim=None, ylim=None):
# Convert index to numeric time for coloring
time_numeric = mdates.date2num(df.index)
salt = df[sal_col]
temp = df[temp_col]
# Use shared limits
smin, smax = xlim
tmin, tmax = ylim
xdim = int((smax - smin) / 0.1) + 1
ydim = int(tmax - tmin) + 1
si = np.linspace(smin, smax, xdim)
ti = np.linspace(tmin, tmax, ydim)
# Density grid
dens = np.zeros((ydim, xdim))
for j in range(ydim):
for i in range(xdim):
dens[j, i] = gsw.rho(si[i], ti[j], 0)
# Plot
fig, ax = plt.subplots(figsize=(10, 6))
CS = ax.contour(si, ti, dens, colors='gray', linestyles='dashed')
ax.clabel(CS, fontsize=10, inline=1, fmt='%1.0f')
sc = ax.scatter(salt, temp, c=time_numeric, cmap='viridis', s=10, edgecolor='none')
cbar = plt.colorbar(sc)
cbar.set_label('Date')
cbar.ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.setp(cbar.ax.yaxis.get_majorticklabels(), rotation=45)
ax.set_xlabel('Salinity')
ax.set_ylabel('Temperature (°C)')
ax.set_title(title)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.grid(True)
plt.tight_layout()
plt.show()
plot_ts_diagram(fall_24, title='TS Plot: Fall 2024', xlim=shared_xlim, ylim=shared_ylim)
plot_ts_diagram(spring_25, title='TS Plot: Spring 2025', xlim=shared_xlim, ylim=shared_ylim)
1B. What differences do you see in the two plots? Why are there these differences (besides just being different times)?¶
Due to the difference in season the TS plots are noticeably different. There is a large difference in the temp as the spring season shows much cooler water. In a ddition to the temp the salinity range is much smaller in the spring. The variablity is due to the change in the ocean conditions as the spring deployment is moving from the winter into the spring and fall deployment is moving from fall to winter.
2A. Create time series plots for the data for each deployment. Each deployment should be a separate column of plots, and the figure should be formatted as a 7 x 2 of plots, where each plot should be time (x) and depth (y), colored by the specific variable. Be sure to label your plots.¶
variables = ['beta_470nm','beta_532nm','beta_660nm', 'cdom', 'density', 'salinity', 'temperature']
def plot_time_depth_series(fall_df, spring_df, variables):
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
# Ensure depth is numeric and positive
fall_df = fall_df.copy()
spring_df = spring_df.copy()
fall_df['depth'] = pd.to_numeric(fall_df['depth'], errors='coerce').abs()
spring_df['depth'] = pd.to_numeric(spring_df['depth'], errors='coerce').abs()
# Drop rows with missing depth or time
fall_df = fall_df.dropna(subset=['depth'])
spring_df = spring_df.dropna(subset=['depth'])
# Set up plot grid
fig, axes = plt.subplots(nrows=7, ncols=2, figsize=(16, 28), sharey=True)
cmap = 'viridis'
# Get full time ranges for x-axis
fall_xlim = (fall_df.index.min(), fall_df.index.max())
spring_xlim = (spring_df.index.min(), spring_df.index.max())
for i, var in enumerate(variables):
for j, (df, label, xlim) in enumerate(zip(
[fall_df, spring_df],
['Fall 2024', 'Spring 2025'],
[fall_xlim, spring_xlim]
)):
ax = axes[i, j]
if var not in df.columns or 'depth' not in df.columns:
ax.set_visible(False)
continue
x = pd.to_datetime(df.index)
y = df['depth']
z = df[var]
pcm = ax.scatter(x, y, c=z, cmap=cmap, s=5, edgecolors='none')
# Enforce depth orientation
ax.set_ylim(df['depth'].max(), df['depth'].min()) # flip y-axis explicitly
ax.set_xlim(xlim)
if j == 0:
ax.set_ylabel('Depth (m)', fontsize=10)
if i == 0:
ax.set_title(label, fontsize=12)
if i == len(variables) - 1:
ax.set_xlabel('Time', fontsize=10)
# Colorbar
cbar = fig.colorbar(pcm, ax=ax, orientation='vertical', pad=0.01)
cbar.set_label(var)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.tick_params(axis='x', labelrotation=45)
plt.tight_layout()
plt.subplots_adjust(hspace=0.4)
plt.show()
plot_time_depth_series(fall_24, spring_25, variables)
2B. Describe all of the detail seen across these two missions, highlighting significant similarities/differences in each variable.¶
The Beta readings show a general higher count during the spring mission. There was less variability in density levels during the spring mission but with similar max values. For the salinity there was a high salinity pocket during the spring mission at depth. The fall mission has higher salinity at depth but the column was well mixed. The temp shows much lower levels for spring but a warm deep pocket occurs compared to fall which shows higher temps at the surface.
3A. Create a single, rotating 3D plot visualizing temperature during both deployments.¶
fall_df = fall_24.copy()
fall_df['deployment'] = 'Fall 2024'
spring_df = spring_25.copy()
spring_df['deployment'] = 'Spring 2025'
df = pd.concat([fall_df, spring_df])
df = df.dropna(subset=['temperature', 'depth', 'latitude', 'longitude'])
df['depth'] = pd.to_numeric(df['depth'], errors='coerce').abs()
df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce')
df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce')
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import animation
from IPython.display import HTML
def rotating_3d_two_colorbars(df):
fig = plt.figure(figsize=(14, 8))
ax = fig.add_subplot(111, projection='3d')
# Split deployments
fall = df[df['deployment'] == 'Fall 2024']
spring = df[df['deployment'] == 'Spring 2025']
# Fall color range
fall_temp_min = fall['temperature'].min()
fall_temp_max = fall['temperature'].max()
# Spring color range
spring_temp_min = spring['temperature'].min()
spring_temp_max = spring['temperature'].max()
# Plot Fall 2024 (red)
sc1 = ax.scatter(
fall['longitude'], fall['latitude'], fall['depth'],
c=fall['temperature'], cmap='Reds',
vmin=fall_temp_min, vmax=fall_temp_max,
marker='o', s=10, alpha=0.8, label='Fall 2024'
)
# Plot Spring 2025 (blue)
sc2 = ax.scatter(
spring['longitude'], spring['latitude'], spring['depth'],
c=spring['temperature'], cmap='Blues',
vmin=spring_temp_min, vmax=spring_temp_max,
marker='x', s=15, alpha=0.8, label='Spring 2025'
)
# Axis labels
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_zlabel('Depth (m)')
ax.set_title('3D Temperature by Deployment (Two Colorbars)')
ax.set_zlim(df['depth'].max(), df['depth'].min()) # Flip for depth
# Fall colorbar (red)
cbar1 = fig.colorbar(sc1, ax=ax, pad=0.01, fraction=0.025)
cbar1.set_label('Fall 2024 Temp (°C)', color='darkred')
cbar1.ax.yaxis.set_tick_params(color='darkred')
cbar1.outline.set_edgecolor('darkred')
# Spring colorbar (blue)
cbar2 = fig.colorbar(sc2, ax=ax, pad=0.05, fraction=0.025)
cbar2.set_label('Spring 2025 Temp (°C)', color='darkblue')
cbar2.ax.yaxis.set_tick_params(color='darkblue')
cbar2.outline.set_edgecolor('darkblue')
ax.legend()
# Rotate more: smoother + full 360+ loop
def rotate(angle):
ax.view_init(elev=20, azim=angle)
return fig,
anim = animation.FuncAnimation(fig, rotate, frames=np.arange(0, 360, 4), interval=100)
plt.close(fig)
return anim
anim = rotating_3d_two_colorbars(df)
HTML(anim.to_jshtml())
Animation size has reached 21088161 bytes, exceeding the limit of 20971520.0. If you're sure you want a larger animation embedded, set the animation.embed_limit rc parameter to a larger value (in MB). This and further frames will be dropped.